package com.cheng.spider.extract.novel;

import com.cheng.spider.constant.MimeType;
import com.cheng.spider.extract.ExtractRule;
import com.cheng.spider.extract.Result;
import com.cheng.spider.extract.SaveRule;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SimpleNovelExtractor extends NovelExtractor {

    @Override
    protected String extractContent(Document document) {
        return document.select("div#content > p").html()
                .replace("<br>", "\n\n")
                .replace(" ", "")
                .replace("&nbsp;&nbsp;&nbsp;&nbsp;", "")
                .replace("&amp;", "")
                .replace("bsp;", "");
    }

    @Override
    protected String extractTitle(Document document) {
        return document.title();
    }

    @Override
    protected String getNovelFileName() {
        return "测试文件.txt";
    }
}
